use uspto_namstand, clear
/* uspto_namstand.dta can be made by using the "nameonly_main.do" file in 
3_MATCHING > 0.namstand where the input is a list of uspto kr assignee names 
in assignee_uspto_kr.dta ('engname') */

preserve

keep standard_name stem_name
duplicates drop standard_name, force
so standard
gen assgid = _n

sa USPTO_KR_assg, replace

restore

keep standard_name stem_name wku
merge n:1 standard_name using USPTO_KR_assg
keep if _ == 3
drop _m

order assgid standard_name stem_name wku

outsheet using assgid.csv, comma replace


use family.dta, clear
/* family.dta should come from 1_KIPRIS > Family folder */

keep if countrycode =="미국" | countryname == "미귝"
keep if literaturekind == "B1" | literaturekind == "B2"
keep if familykind == "DOCDBFAMILY"

replace familynumber = subinstr(familynumber, "US", "", 30)
destring familynumber, replace

merge n:1 applicationnumber using krassg
keep if _m == 3
keep applicationnumber familynumber
ren (applicationnumber familynumber) (appnum wku)
format appnum %15.0g

outsheet using family_temp.csv, comma replace


use assignee.dta, clear
/* assignee.dta should come from 1_KIPRIS > Biblio folder */

keep appnum order code
ren code kiprisid
bysort appnum: egen dup = max(order)
gen weight = 1/dup
drop dup order 

outsheet using assg_weight.csv, comma replace
